package com.six.compress.old;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.orc.CompressionKind;
import org.apache.orc.OrcFile;
import org.apache.orc.TypeDescription;
import org.apache.orc.Writer;
import org.apache.orc.storage.ql.exec.vector.DoubleColumnVector;
import org.apache.orc.storage.ql.exec.vector.LongColumnVector;
import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch;

import java.util.Random;

/**
 * 测试文件：352.9MB
 * 测试一
 * 10000条,28001列float
 * none
 * 当前:第=>9999条 cost=>43451  avg=>4ms/条
 * cost=>43
 * 文件：128.5MB
 * snappy
 * 当前:第=>9999条 cost=>44436  avg=>4ms/条
 * cost=>44
 * 文件：11.2MB
 * zlib
 * 当前:第=>9999条 cost=>47952  avg=>4ms/条
 * cost=>48
 * 文件：5.6MB
 * 测试二
 * 3000列
 * 当前:第=>9999条 cost=>5980  avg=>0ms/条
 * 文件：3.2MB
 * cost=>6
 * 6s/10000条+zlib avg 0.6ms/条
 */
@SuppressWarnings("all")
public class ORCWriterFloatTest {

    public static void main(String[] args) throws Exception {

        //定义ORC数据结构，即表结构
        TypeDescription schema = TypeDescription.createStruct();
        schema.addField("time", TypeDescription.createLong());
        for (int i = 1; i < 3001; i++) {
            schema.addField("p" + i, TypeDescription.createFloat());
        }
        for (int i = 3001; i < 28000; i++) {
            schema.addField("p" + i, TypeDescription.createBoolean());
        }
        Writer writer = null;
        try {
            //输出ORC文件本地绝对路径
            String lxw_orc1_file = "/home/hdfs/data/fly_param_float_tinyint_3000_zlib_4.orc";
            Configuration conf = new Configuration();
            FileSystem.getLocal(conf);
            writer = OrcFile.createWriter(
                    new Path(lxw_orc1_file),
                    OrcFile.writerOptions(conf)
                            .setSchema(schema)
//                            .stripeSize(1024)
//                            .bufferSize(1024)
//                            .blockSize(1024)
                            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
                            .compress(CompressionKind.ZLIB)
                            .version(OrcFile.Version.V_0_12)
            );

            long start = System.currentTimeMillis();
            Random random = new Random(1000000000);
            int max_row = 10000;

            VectorizedRowBatch batch = schema.createRowBatch(max_row);

            for (int i = 0; i < max_row; i++) {
                int rowcount = batch.size++;
                ((LongColumnVector) batch.cols[0]).vector[i] = start + i;
                for (int j = 1; j < 3001; j++) {
                    ((DoubleColumnVector) batch.cols[j]).vector[i] = random.nextFloat();
                }
                for (int k = 3001; k < 28000; k++) {
                    ((LongColumnVector) batch.cols[k]).vector[i] = 1;
                }
                if ((i + 1) % 100 == 0) {
                    writer.addRowBatch(batch);
                    batch.reset();
                    long end = System.currentTimeMillis();
                    System.out.println("当前:第=>" + i + "条" + " cost=>" + (end - start) + "  avg=>" + (end - start) / i + "ms/条");
                }

            }
            writer.addRowBatch(batch);
            writer.close();
            long e = System.currentTimeMillis();
            System.out.println("cost=>" + (e - start) / 1000);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}
